#### prepare a package ####
require(lfe)

#### load data ####
# download the data of the 2017 UTokyo-Asahi Voter Survey
download.file("http://www.masaki.j.u-tokyo.ac.jp/utas/2017UTASV20200326.csv", 
              "2017UTASV20200326.csv")
# load the data
UTAVS.data <- read.csv("2017UTASV20200326.csv")

## prepare variables
# gender
UTAVS.data$gender <- ifelse(UTAVS.data$F1 == 99, NA, UTAVS.data$F1 - 1)
# age
UTAVS.data$age <- ifelse(UTAVS.data$F2 == 99, NA, 
                         ifelse(UTAVS.data$F2 == 1, 1, UTAVS.data$F2 - 1))

# indicator variables of no opinion
UTAVS.data$constition.NO <- ifelse(UTAVS.data$Q26_1 == 3 | 
                                     (UTAVS.data$Q26_1 == 99 & 
                                        UTAVS.data$Q26_2_1 > 11 & 
                                        UTAVS.data$Q26_3_1 > 11), 1, 0)
UTAVS.data$defense.NO <- ifelse(UTAVS.data$Q23_1 == 3 | 
                                  UTAVS.data$Q23_1 == 99, 1, 0)
UTAVS.data$yasukuni.NO <- ifelse(UTAVS.data$Q23_5 == 3 | 
                                   UTAVS.data$Q23_5 == 99, 1, 0)
UTAVS.data$surname.NO <- ifelse(UTAVS.data$Q23_14 == 3 | 
                                  UTAVS.data$Q23_14 == 99, 1, 0)
UTAVS.data$samesex.NO <- ifelse(UTAVS.data$Q23_15 == 3 | 
                                  UTAVS.data$Q23_15 == 99, 1, 0)
UTAVS.data$immigrant.NO <- ifelse(UTAVS.data$Q23_12 == 3 | 
                                    UTAVS.data$Q23_12 == 99, 1, 0)
UTAVS.data$equality.NO <- ifelse(UTAVS.data$Q24_1 == 3 | 
                                   UTAVS.data$Q24_1 == 99, 1, 0)
UTAVS.data$tax.NO <- ifelse(UTAVS.data$Q23_10 == 3 | 
                              UTAVS.data$Q23_10 == 99, 1, 0)

# indicator variables of middle-category responses
UTAVS.data$constition.middle <- ifelse(UTAVS.data$Q26_1 == 99 & 
                                         UTAVS.data$Q26_2_1 > 11 & 
                                         UTAVS.data$Q26_3_1 > 11, NA, 
                                       ifelse(UTAVS.data$Q26_1 == 3, 1, 0))
UTAVS.data$defense.middle <- ifelse(UTAVS.data$Q23_1 == 99, NA, 
                                    ifelse(UTAVS.data$Q23_1 == 3, 1, 0))
UTAVS.data$yasukuni.middle <- ifelse(UTAVS.data$Q23_5 == 99, NA, 
                                     ifelse(UTAVS.data$Q23_5 == 3, 1, 0))
UTAVS.data$surname.middle <- ifelse(UTAVS.data$Q23_14 == 99, NA, 
                                    ifelse(UTAVS.data$Q23_14 == 3, 1, 0))
UTAVS.data$samesex.middle <- ifelse(UTAVS.data$Q23_15 == 99, NA, 
                                    ifelse(UTAVS.data$Q23_15 == 3, 1, 0))
UTAVS.data$immigrant.middle <- ifelse(UTAVS.data$Q23_12 == 99, NA, 
                                      ifelse(UTAVS.data$Q23_12 == 3, 1, 0))
UTAVS.data$equality.middle <- ifelse(UTAVS.data$Q24_1 == 99, NA, 
                                     ifelse(UTAVS.data$Q24_1 == 3, 1, 0))
UTAVS.data$tax.middle <- ifelse(UTAVS.data$Q23_10 == 99, NA, 
                                ifelse(UTAVS.data$Q23_10 == 3, 1, 0))

# number of respondents
N <- nrow(UTAVS.data)

## create a long format data
long.UTAVS.data <- data.frame(id = rep(UTAVS.data$ID, 8), 
                              gender = rep(UTAVS.data$gender, 8), 
                              age = rep(UTAVS.data$age, 8), 
                              no.opinion = c(UTAVS.data$constition.NO, 
                                             UTAVS.data$defense.NO, 
                                             UTAVS.data$yasukuni.NO, 
                                             UTAVS.data$surname.NO, 
                                             UTAVS.data$samesex.NO, 
                                             UTAVS.data$immigrant.NO, 
                                             UTAVS.data$equality.NO, 
                                             UTAVS.data$tax.NO), 
                              middle = c(UTAVS.data$constition.middle, 
                                         UTAVS.data$defense.middle, 
                                         UTAVS.data$yasukuni.middle, 
                                         UTAVS.data$surname.middle, 
                                         UTAVS.data$samesex.middle, 
                                         UTAVS.data$immigrant.middle, 
                                         UTAVS.data$equality.middle, 
                                         UTAVS.data$tax.middle), 
                              issue.label = factor(c(rep("constitution", N), 
                                                     rep("defense", N), 
                                                     rep("yasukuni", N), 
                                                     rep("surname", N), 
                                                     rep("samesex", N), 
                                                     rep("immigrant", N), 
                                                     rep("equality", N), 
                                                     rep("tax", N)), 
                                                   levels = c("constitution", 
                                                              "defense", 
                                                              "yasukuni", 
                                                              "surname", 
                                                              "samesex", 
                                                              "immigrant", 
                                                              "equality", 
                                                              "tax")), 
                              issue.area = factor(c(rep("security", 3 * N), 
                                                    rep("social", 3 * N), 
                                                    rep("economic", 2 * N)), 
                                                  levels = c("security", 
                                                             "social", 
                                                             "economic")))

# long format data without "visit to the Yasukuni Shrine" and "separate surnames"
long.UTAVS.data.limited <- subset(long.UTAVS.data, 
                                  ! issue.label %in% c("yasukuni", "surname"))

#### analysis ####
## dependent variable = middle category + non-response
# all issues
NO.result.1 <- felm(no.opinion ~ age : issue.area + gender | 
                      issue.label | 0 | id, long.UTAVS.data)

# without "visit to the Yasukuni Shrine" and "separate surnames"
NO.result.2 <- felm(no.opinion ~ age : issue.area + gender | issue.label | 0 | id, 
                    long.UTAVS.data.limited)

# Table A.2(a)
round(summary(NO.result.1)$coefficients[c(2:4, 1), ], 3)
length(unique(long.UTAVS.data[-NO.result.1$na.action, "id"]))  # number of respondents
NO.result.1$N  # number of responses

round(summary(NO.result.2)$coefficients[c(2:4, 1), ], 3)
length(unique(long.UTAVS.data.limited[-NO.result.2$na.action, "id"]))  # number of respondents
NO.result.2$N  # number of responses

# significance test for the difference between "age:issue.areasecurity" and "age:issue.areasocial"
NO.result.suppl <- felm(no.opinion ~ age + age : issue.area + gender | 
                          issue.label | 0 | id, long.UTAVS.data)
round(summary(NO.result.suppl)$coefficients, 3)  # see "age:issue.areasocial" in this table

## dependent variable = only middle category
# all issues
middle.result.1 <- felm(middle ~ age : issue.area + gender | 
                          issue.label | 0 | id, long.UTAVS.data)

# without "visit to the Yasukuni Shrine" and "separate surnames"
middle.result.2 <- felm(middle ~ age : issue.area + gender | 
                          issue.label | 0 | id, long.UTAVS.data.limited)

# Table A.2(b)
round(summary(middle.result.1)$coefficients[c(2:4, 1), ], 3)
length(unique(long.UTAVS.data[-middle.result.1$na.action, "id"]))  # number of respondents
middle.result.1$N  # number of responses

round(summary(middle.result.2)$coefficients[c(2:4, 1), ], 3)
length(unique(long.UTAVS.data.limited[-middle.result.2$na.action, "id"]))  # number of respondents
middle.result.2$N  # number of responses